import numpy as np
from tensorflow.keras.datasets import imdb

# 10000 mean top 10000 appear frequency words
(tr_x, tr_y), (te_x, te_y) = imdb.load_data(num_words=10000)

def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        for j in sequence:
            results[i, j] = 1.
    return results


tr_x_vec = vectorize_sequences(tr_x)
te_x_vec = vectorize_sequences(te_x)